org.locationtech.geomesa.index.conf.ColumnGroups.scala Maven / Gradle / Ivy
/***********************************************************************
* Copyright (c) 2013-2024 Commonwealth Computer Research, Inc.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0
* which accompanies this distribution and is available at
* http://www.opensource.org/licenses/apache2.0.php.
***********************************************************************/
package org.locationtech.geomesa.index.conf
import com.github.benmanes.caffeine.cache.Caffeine
import org.geotools.api.feature.simple.SimpleFeatureType
import org.geotools.api.filter.Filter
import org.geotools.feature.simple.SimpleFeatureTypeBuilder
import org.locationtech.geomesa.features.SerializationOption.SerializationOptions
import org.locationtech.geomesa.features.SimpleFeatureSerializer
import org.locationtech.geomesa.features.kryo.{KryoFeatureSerializer, ProjectingKryoFeatureSerializer}
import org.locationtech.geomesa.filter.FilterHelper
import org.locationtech.geomesa.index.metadata.TableBasedMetadata
import org.locationtech.geomesa.utils.cache.CacheKeyGenerator
import org.locationtech.geomesa.utils.geotools.Transform.Transforms
import org.locationtech.geomesa.utils.geotools.{SimpleFeatureTypes, Transform}
import org.locationtech.geomesa.utils.index.VisibilityLevel
import java.nio.charset.StandardCharsets
import java.util.concurrent.TimeUnit
class ColumnGroups {
import org.locationtech.geomesa.filter.RichTransform.RichTransform
import org.locationtech.geomesa.utils.geotools.RichAttributeDescriptors.RichAttributeDescriptor
import org.locationtech.geomesa.utils.geotools.RichSimpleFeatureType.RichSimpleFeatureType
import scala.collection.JavaConverters._
/**
* Gets the column groups for a simple feature type. The default group will contain all columns
*
* @param sft simple feature type
* @return
*/
def apply(sft: SimpleFeatureType): Seq[(Array[Byte], SimpleFeatureType)] = {
val key = CacheKeyGenerator.cacheKey(sft)
var groups = ColumnGroups.cache.getIfPresent(key)
if (groups == null) {
if (sft.getVisibilityLevel == VisibilityLevel.Attribute) {
groups = IndexedSeq((ColumnGroups.Attributes, sft))
} else {
val map = scala.collection.mutable.Map.empty[String, SimpleFeatureTypeBuilder]
sft.getAttributeDescriptors.asScala.foreach { descriptor =>
descriptor.getColumnGroups().foreach { group =>
map.getOrElseUpdate(group, new SimpleFeatureTypeBuilder()).add(descriptor)
}
}
val sfts = map.map { case (group, builder) =>
builder.setName(sft.getTypeName)
val subset = SimpleFeatureTypes.immutable(builder.buildFeatureType(), sft.getUserData)
(group.getBytes(StandardCharsets.UTF_8), subset)
} + (ColumnGroups.Default -> sft)
// return the smallest groups first, for consistency tiebreaker is string comparison of group
groups = sfts.toIndexedSeq.sortBy { case (group, subset) =>
(subset.getAttributeCount, new String(group, StandardCharsets.UTF_8))
}
}
ColumnGroups.cache.put(key, groups)
}
groups
}
/**
* Get serializers for each column group
*
* @param sft simple feature type
* @return
*/
def serializers(sft: SimpleFeatureType): Seq[(Array[Byte], SimpleFeatureSerializer)] = {
apply(sft).map { case (colFamily, subset) =>
if (colFamily.eq(ColumnGroups.Default) || colFamily.eq(ColumnGroups.Attributes)) {
(colFamily, KryoFeatureSerializer(subset, SerializationOptions.withoutId))
} else {
(colFamily, new ProjectingKryoFeatureSerializer(sft, subset, SerializationOptions.withoutId))
}
}
}
/**
* Find a column group that supports the given transform and filter
*
* @param sft simple feature type
* @param transform transform definitions
* @param ecql filter, if any
* @return
*/
def group(sft: SimpleFeatureType, transform: Option[String], ecql: Option[Filter]): (Array[Byte], SimpleFeatureType) = {
val groups = apply(sft)
transform.map(Transforms(sft, _)) match {
case None => groups.last
case Some(definitions) =>
val iter = groups.iterator
var group = iter.next
// last group has all the columns, so just return the last one if nothing else matches
while (iter.hasNext && !supports(group._2, definitions, ecql)) {
group = iter.next
}
group
}
}
/**
* Validate that the column groups do not overlap with reserved column groups
*
* @param sft simple feature type
*/
def validate(sft: SimpleFeatureType): Unit = {
val groups = sft.getAttributeDescriptors.asScala.flatMap(_.getColumnGroups()).distinct
groups.foreach { group =>
if (group == ColumnGroups.DefaultString || group == ColumnGroups.AttributesString) {
throw new IllegalArgumentException(s"Column group '$group' is reserved for internal use - " +
"please choose another name")
}
}
if (sft.getVisibilityLevel == VisibilityLevel.Attribute && groups.nonEmpty) {
throw new IllegalArgumentException("Column groups are not supported when using attribute-level visibility")
}
}
/**
* Does the simple feature type contain the fields required to evaluate the transform and filter
*
* @param sft simple feature type
* @param transforms transform definitions
* @param filter filter
* @return
*/
private def supports(sft: SimpleFeatureType, transforms: Seq[Transform], filter: Option[Filter]): Boolean = {
filter.forall(FilterHelper.propertyNames(_, sft).forall(sft.indexOf(_) != -1)) &&
transforms.flatMap(_.properties).forall(sft.indexOf(_) != -1)
}
}
object ColumnGroups {
private val DefaultString = "d"
private val AttributesString = "a"
val Default: Array[Byte] = DefaultString.getBytes(StandardCharsets.UTF_8)
val Attributes: Array[Byte] = AttributesString.getBytes(StandardCharsets.UTF_8)
private val cache =
Caffeine.newBuilder()
.expireAfterWrite(TableBasedMetadata.Expiry.toDuration.get.toMillis, TimeUnit.MILLISECONDS)
.build[String, IndexedSeq[(Array[Byte], SimpleFeatureType)]]()
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy